1 - Visualizing SGG metadata

sample_data(ps.b.r)$Site.name = factor(sample_data(ps.b.r)$Site.name, 
                                          levels = c("Blaenavon","Cefn Hengoed","Mountain Gate","Taff Bargoed",
                                                     "Glyncastle","Dinas","Ynysarwed","Celynen North","Morlais",
                                                     "Lindsay","Six Bells","Crumlin Navigation","Taffs Well"))
metadata<-sample_data(ps.b.r)
#Temp
temp_table.plot<-ggplot(metadata, 
                        aes(x=Site.name, y=Temp.C, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_hline(yintercept = 13.4,
             color="dark red",
             linetype="dashed") +
  geom_hline(yintercept = 11,
             color="gray",
             linetype="dashed") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=12),
        legend.position = "bottom",
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill="gray95", size=.5, linetype="dotted")) +
  annotate("text", "Mountain Gate", 14, vjust = -1, hjust=.45, size= 4,
           label = "Average mine water temperature (Farr et al, 2016)") +
  labs(y=expression(paste("Temperature (",degree,"C)")))
#DO
do.plot<-ggplot(metadata, 
                        aes(x=Site.name, y=`DO..sat`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_line(aes(group=Month, color=Month),
            stat="smooth", method="loess", 
            alpha = 0.4, size=1.5) +
  geom_hline(yintercept = 50,
           color="gray60",
           linetype="dashed") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=12),
        legend.position = "none") +
  labs(y="Dissolved Oxygen (%)")
#Sulfate
so4.plot<-ggplot(metadata, 
                        aes(x=Site.name, y=`SO4..µg.ml`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_line(aes(group=Month, color=Month),
            stat="smooth", method="loess", 
            alpha = 0.4, size=1.5) +
  geom_hline(yintercept = 500,
         color="gray60",
         linetype="dashed") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=12),
        legend.position = "none") +
  labs(y=expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")"))) +
  guides(size=FALSE)
#pH
pH.plot<-ggplot(metadata, 
                        aes(x=Site.name, y=`pH`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_line(aes(group=Month, color=Month),
            stat="smooth", method="loess", 
            alpha = 0.4, size=1.5) +
  geom_hline(yintercept = 7,
           color="gray60",
           linetype="dashed") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=12),
        legend.position = "none") +
  labs(y="pH") +
  guides(size=FALSE)
#EC
ec.plot<-ggplot(metadata, 
                        aes(x=Site.name, y=`EC.uS.cm`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_line(aes(group=Month, color=Month),
            stat="smooth", method="loess", 
            alpha = 0.4, size=1.5) +
  geom_hline(yintercept = 2000,
           color="gray60",
           linetype="dashed") +
  geom_hline(yintercept = 1000,
           color="grey80",
           linetype="dashed") +
  scale_y_continuous(breaks=c(0,500,1000,1500,2000,2500,3000)) +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_text(angle = 25, hjust = 1, size= 14),
        axis.text.y = element_text(size=12),
        legend.position = "top",
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill="gray95", size=.5, linetype="dotted")) +
  labs(y=expression(paste("EC (",mu,"S.cm"^{-1},")")))
plot_grid(temp_table.plot, do.plot, so4.plot, pH.plot, ec.plot, ncol = 1, align='v', rel_heights = c(1.75,1,1,1,1.75))

And does sulfate ‘more or less’ negatively correlate with DO just to make sure?

do.so4.plot<-ggplot(metadata, 
                        aes(x=Site.name)) +
  geom_point(aes(y=`DO..sat`,
                 fill="Dissolved Oxygen", color="Dissolved Oxygen",
                 shape=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_point(aes(y=`SO4..µg.ml`/12,
                 fill="Sulfate", color="Sulfate",
                 shape=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  geom_line(aes(y=`DO..sat`,
                group=Month, 
                color="Dissolved Oxygen"),
            stat="smooth", method="loess", 
            alpha = 0.4, size=1.5) +
  geom_line(aes(y=`SO4..µg.ml`/12,
                group=Month, 
                color="Sulfate"),
        stat="smooth", method="loess", 
        alpha = 0.4, size=1.5) +
  scale_colour_manual(values = c("dodgerblue1", "firebrick2")) +
  scale_y_continuous(sec.axis = sec_axis(~.*12, name = expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")")))) +
  labs(y="Dissolved Oxygen (%)",
       colour = "Parameter") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_text(angle=35, hjust = 1,vjust = 1),
        axis.text.y = element_text(size=12),
        legend.position = "top",
        legend.justification = "center",
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13, face = "bold"),
        legend.direction = "vertical") +
  guides(fill = FALSE)
do.so4.plot

Yep-ish.

But how does the overal picture look regarding how variables relate to one another?

#remove all columns with one or more NA entry
#NA in DO substituted to the mean of the two values in the site
sgg_metadata<-as(sample_data(ps.b.r),"data.frame")
sgg_metadata$DO..sat[is.na(sgg_metadata$DO..sat)] <- 44.1
sgg_metadata_noNA = sgg_metadata[ , colSums(is.na(sgg_metadata)) == 0]
colnames(sgg_metadata_noNA) <- c("Sample_Code","Sample.ID","Sample_type","Site.ID","Site.name","Time_point","Month","Batch","Date", "Time",
                                 "Temperature","EC","DO","K","Na","Cl","SO4","6_Li","7_Li","7_Li_He","B","Sc","Mn","Co","Ni","Ga","As","Se",
                                 "Rb","Sr","Y","Te","Cs","Ba","La","Nd","Eu","Er","Lu","U")
sgg_metadata_noNA_april<-subset(sgg_metadata_noNA, 
                                                  Month %in% c("April"))
sgg_metadata_noNA_august<-subset(sgg_metadata_noNA, 
                                                  Month %in% c("August"))
sgg_metadata_noNA_december<-subset(sgg_metadata_noNA, 
                                                  Month %in% c("December"))
cols_to_go_april <- names(sgg_metadata_noNA_april) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
                                                "Site.name","Time_point","Month","Batch",
                               "Date","Time")
cols_to_go_august <- names(sgg_metadata_noNA_august) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
                                                        "Site.name","Time_point","Month","Batch",
                               "Date","Time")
cols_to_go_december <- names(sgg_metadata_noNA_december) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
                                                            "Site.name","Time_point","Month","Batch",
                               "Date","Time")
sgg_metadata_noNA_april_f <- sgg_metadata_noNA_april[!cols_to_go_april]
sgg_metadata_noNA_august_f <- sgg_metadata_noNA_august[!cols_to_go_august]
sgg_metadata_noNA_december_f <- sgg_metadata_noNA_december[!cols_to_go_december]

april_corr<-ggcorr(sgg_metadata_noNA_april_f, method = c("all.obs", "spearman"),
                   palette = "RdBu",
                   nbreaks = 10, hjust = 1, 
                   size=4, layout.exp = 6) + 
                   ggtitle("April")
august_corr<-ggcorr(sgg_metadata_noNA_august_f, method = c("all.obs", "spearman"), 
                    palette = "RdBu",
                    nbreaks = 10, hjust = 1, 
                    size=4, layout.exp = 6) + 
                    ggtitle("August")
december_corr<-ggcorr(sgg_metadata_noNA_december_f, method = c("all.obs", "spearman"), 
                      palette = "RdBu",
                      nbreaks = 10, hjust = 1, 
                      size=4, layout.exp = 6) + 
                      ggtitle("December")

Pearson assumes linearity between variables as well as normal distributions of data and is sensible to transformation. Spearman is a nonparametric measure of rank correlation as Kendall is, meaning they don’t care about transformations. Kendall meaures strength of dependence between two variables. Spearman does not make assumptions of data distribution and measures degree of association between two variables.

There’s clearly a lot of variation, but definitely there’s loads more to learn on how these variables relate.

2 - Does SGG Sulfate data coincide with Coal Authority data from previous years?

swales_ca_metadata<-read.csv("/media/andre/B2F8C9A0F8C962E9/SGG_16S_analysis/SGG_16S_metadata/R_previous_BGS_chem_data_SWales.csv", header=TRUE)
swales_ca_metadata$Site = factor(swales_ca_metadata$Site, 
                                          levels = c("Blaenavon","Cefn Hengoed","Mountain Gate","Taff Bargoed",
                                                     "Glyncastle","Dinas","Ynysarwed","Celynen North","Morlais",
                                                     "Lindsay","Six Bells","Crumlin Navigation","Taffs Well"))
swales_ca_metadata$Month = factor(swales_ca_metadata$Month, 
                                          levels = c("January","February","March","April","May","June","July",
                                                     "August","September","October","November","December"))
#Sulfate from CA
swales_ca_metadata_plot<-ggplot(swales_ca_metadata, 
                        aes(x=Site, y=`SO4.ug.mL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  facet_wrap(~Year, ncol = 1) +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_text(angle = 35, hjust = 1, vjust = 1),
        axis.text.y = element_text(size=12),
        legend.position = "bottom",
        legend.justification = "center",
        legend.text=element_text(size=7)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  scale_y_continuous(expand = c(0,0),
                     breaks = c(0,250,500,750,1000,1250)) +
  labs(y=expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")")))

swales_ca_metadata_2016<-subset(swales_ca_metadata, 
                                                  Year %in% c("2016"))

swales_ca_metadata_2016_plot<-ggplot(swales_ca_metadata_2016, 
                        aes(x=Site, y=`SO4.ug.mL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(size=14),
        axis.text.x = element_text(angle = 25, hjust = 1, vjust = 1, size= 12),
        axis.text.y = element_text(size=12),
        legend.position = "bottom",
        legend.justification = "center",
        legend.text=element_text(size=8)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  scale_y_continuous(breaks = c(0,250,500,750,1000)) +
  labs(y=expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")"))) +
  ggtitle("CA data - 2016")

#quick prep of our own data
so4.plot.cowplot=so4.plot + 
  theme(legend.position = "bottom",
        legend.justification = "center",
        axis.text.x = element_text(angle = 25, hjust = 1, size= 12)) +
  scale_y_continuous(breaks = c(0,250,500,750,1000,1250)) +
  ggtitle("SGG data - 2016")
swales_ca_metadata_plot + ggtitle("Sulfate through the years - CA")

How does CA’s 2016 data compare with ours though?

plot_grid(swales_ca_metadata_2016_plot, so4.plot.cowplot,
          align = 'v', ncol = 1)

3 - According to CA data, how did iron do across our sites in 2016?

iron_theme=theme(
        axis.title.x=element_blank(),
        axis.text.x = element_text(angle = 25, hjust = 1, vjust = 1, size= 12),
        axis.text.y = element_text(size=8),
        axis.title.y = element_text(angle = 0, size=10),
        legend.title = element_text(size=8, face = "bold"))

KONEFE_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016, 
                        aes(x=Site, y=`KONEFE_Fe_mgL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  labs(y=expression(atop("KONEFE Iron as Fe",paste("(mg.L"^{-1},")")))) +
  iron_theme +
  theme(legend.position = "bottom",
        legend.justification = "left",
        legend.text=element_text(size=8),
        axis.text.x = element_blank())

D_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016, 
                        aes(x=Site, y=`D_Fe_mgL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  labs(y=expression(atop("Iron as Fe (Dissolved)", paste("(mg.L"^{-1},")")))) +
  iron_theme +
  theme(legend.position = "none",
        axis.text.x = element_blank())

ICPWATVAR_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016, 
                        aes(x=Site, y=`ICPWATVAR_Fe_mgL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  labs(y=expression(atop("Iron as Fe (Total) ICPWATVAR", paste("(mg.L"^{-1},")")))) +
  iron_theme +
  theme(legend.position = "bottom",
        legend.justification = "left",
        legend.text=element_text(size=8),
        axis.text.x = element_blank())

Fe2_mgL_plot<-ggplot(swales_ca_metadata_2016, 
                        aes(x=Site, y=`Fe2_mgL`, fill=Month)) +
  geom_point(aes(fill=Month, color=Month),
             size=2,
             position = position_dodge(width=0.5)) +
  guides(fill = guide_legend(nrow = 1),
         color = guide_legend(nrow = 1)) +
  labs(y=expression(atop("Ferrous Iron as Fe"^{2+""}, paste("(mg.L"^{-1},")")))) +
  iron_theme +
  theme(legend.position = "none")
plot_grid(KONEFE_Fe_mgL_plot, D_Fe_mgL_plot, ICPWATVAR_Fe_mgL_plot, Fe2_mgL_plot,
          align = 'v', ncol = 1, 
          rel_heights = c(0.1,0.1,0.1,0.1))

So, in 2016, while most of the other sites were close to 0, it seems a LOT of iron was being detected in Ynysarwed. Are there taxonomical evidences of more iron users there? Not really as seen on SGG-GenTaxonomy.html.

4 - How do environmental variables relate to microbial community structure in this dataset?

An adonis test tests “how is variation attributed to different experimental” variables, using distance matrices. It analyses how variables can be sources of variation of distance matrices and depends on fitting linear models to distance matrices. AKA PERMANOVA.

Null hypothesis: tested metadata categories have the same centroid.

metadata<-as(sample_data(ps.b.r),"data.frame")
metadata$DO..sat[is.na(metadata$DO..sat)] <- 44.1
metadata_noNA = metadata[ , colSums(is.na(metadata)) == 0]

ps_sig <- phyloseq(otu_table(SGG_SVt, taxa_are_rows=FALSE),
                   sample_data(metadata_noNA), 
                   tax_table(SGG_Tax))
sample_data(ps_sig)$Month = factor(sample_data(ps_sig)$Month, 
                                   levels = c("April","August","December","Control"))
ps_sig_ntf = subset_samples(ps_sig, Site.name != "Taffs Well PUMPED")
ps_sig_ntf_nc = subset_samples(ps_sig_ntf, Sample_type != "Control")
ps_sig_ntf_nc_noab = subset_taxa(ps_sig_ntf_nc, Kingdom %in% c("Archaea", "Bacteria"))
ps_cap_r = transform_sample_counts(ps_sig_ntf_nc_noab, function(x) x/sum(x))

set.seed(1)
# Calculate Jensen-Shannon distance matrix
ps_cap_r_jsd <- phyloseq::distance(ps_cap_r, method = "jsd")
# make a data frame from the sample_data
sampledf_cap <- data.frame(sample_data(ps_cap_r))

adonis(ps_cap_r_jsd ~ Site.name + Temp.C + Month + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml + X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml + X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml + X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml, 
       data = sampledf_cap, strata = sampledf_cap$Time_point, permutations=1e3)
## 
## Call:
## adonis(formula = ps_cap_r_jsd ~ Site.name + Temp.C + Month +      EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml +      SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml +      X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml +      X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml +      X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml +      X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml +      X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml,      data = sampledf_cap, permutations = 1000, strata = sampledf_cap$Time_point) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 1000
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model      R2   Pr(>F)    
## Site.name          12   17.2135 1.43446  56.971 0.82991 0.000999 ***
## Temp.C              1    0.1026 0.10264   4.076 0.00495 0.000999 ***
## Month               2    0.1955 0.09774   3.882 0.00942 0.000999 ***
## EC.uS.cm            1    0.0528 0.05277   2.096 0.00254 0.020979 *  
## DO..sat             1    0.0989 0.09888   3.927 0.00477 0.000999 ***
## K.µg.ml             1    0.0356 0.03561   1.414 0.00172 0.174825    
## Na.µg.ml            1    0.0390 0.03897   1.548 0.00188 0.108891    
## Cl..µg.ml           1    0.1391 0.13907   5.523 0.00670 0.000999 ***
## SO4..µg.ml          1    0.0638 0.06384   2.535 0.00308 0.011988 *  
## X6.Li.ng.ml         1    0.0482 0.04822   1.915 0.00232 0.039960 *  
## X7.Li.ng.ml         1    0.0404 0.04041   1.605 0.00195 0.087912 .  
## X7.Li..He..ng.ml    1    0.0187 0.01868   0.742 0.00090 0.685315    
## X11.B.ng.ml         1    0.0280 0.02796   1.111 0.00135 0.359640    
## X45.Sc.ng.ml        1    0.0492 0.04917   1.953 0.00237 0.044955 *  
## X55.Mn.ng.ml        1    0.0833 0.08326   3.307 0.00401 0.001998 ** 
## X59.Co.ng.ml        1    0.0429 0.04287   1.703 0.00207 0.086913 .  
## X60.Ni.ng.ml        1    0.0468 0.04678   1.858 0.00226 0.056943 .  
## X69.Ga.ng.ml        1    0.0192 0.01919   0.762 0.00093 0.689311    
## X75.As..He..ng.ml   1    0.0325 0.03249   1.290 0.00157 0.223776    
## X78.Se.ng.ml        1    0.0364 0.03640   1.446 0.00176 0.165834    
## X85.Rb.ng.ml        1    0.0564 0.05639   2.240 0.00272 0.015984 *  
## X88.Sr.ng.ml        1    0.0360 0.03599   1.429 0.00174 0.156843    
## X89.Y.ng.ml         1    0.1430 0.14302   5.680 0.00690 0.000999 ***
## X125.Te.ng.ml       1    0.0204 0.02038   0.809 0.00098 0.642358    
## X133.Cs.ng.ml       1    0.0140 0.01401   0.557 0.00068 0.874126    
## X135.Ba.ng.ml       1    0.0406 0.04056   1.611 0.00196 0.100899    
## X139.La.ng.ml       1    0.1084 0.10840   4.305 0.00523 0.000999 ***
## X146.Nd.ng.ml       1    0.0288 0.02881   1.144 0.00139 0.297702    
## X153.Eu.ng.ml       1    0.0219 0.02194   0.871 0.00106 0.544456    
## X166.Er.ng.ml       1    0.0188 0.01884   0.748 0.00091 0.710290    
## X175.Lu.ng.ml       1    0.0171 0.01708   0.678 0.00082 0.727273    
## X238.U.ng.ml        1    0.0369 0.03685   1.464 0.00178 0.125874    
## Residuals          72    1.8129 0.02518         0.08740             
## Total             116   20.7414                 1.00000             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Site ID significantly explains >80% of the variation. Does this change with a normalised metadata table?

norm_metadata<-read.csv("/media/andre/B2F8C9A0F8C962E9/SGG_16S_analysis/SGG_16S_metadata/SGG_metadata_norm.csv", header=TRUE)
#does this change with a normalised metadata table?
normalize <- function(x) {
    return ((x - min(x)) / (max(x) - min(x)))
}
norm_metadata$Sample_Code<-as.numeric(norm_metadata$Sample_Code)
norm_metadata_s <- norm_metadata[order(norm_metadata$Sample_Code),] 
norm_metadata_s$DO..sat[is.na(norm_metadata_s$DO..sat)] <- 44.1
sampledf_cap_norm <- as.data.frame(lapply(norm_metadata_s, normalize))
sampledf_cap_norm_noNA = sampledf_cap_norm[ , colSums(is.na(sampledf_cap_norm)) == 0]
#remove Sample_Code, substitute with originals
cols_to_go_sampledf_cap_norm_noNA <- names(sampledf_cap_norm_noNA) %in% c("Sample_Code")
sampledf_cap_norm_noNA_f <- sampledf_cap_norm_noNA[!cols_to_go_sampledf_cap_norm_noNA]
sampledf_cap_norm_noNA_f$Sample_Code <- norm_metadata_s$Sample_Code
sampledf_cap_norm_noNA_f <- sampledf_cap_norm_noNA_f %>% select(Sample_Code, everything())

order_jsd<-labels(ps_cap_r_jsd)
sampledf_cap_norm_noNA_f_ord<-sampledf_cap_norm_noNA_f[match(order_jsd, sampledf_cap_norm_noNA_f$Sample_Code),]
rownames(sampledf_cap_norm_noNA_f_ord) <- sampledf_cap_norm_noNA_f_ord$Sample_Code

norm_adonis = adonis(ps_cap_r_jsd ~ Site.ID + Temp.C + Month + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml + X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml + X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml + X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml, 
       data = sampledf_cap_norm_noNA_f_ord, permutations=1e3)

#Choose the 5 variables explaining the most variation
norm_adonis_df=as(norm_adonis$aov.tab, "data.frame")
head(norm_adonis_df[order(-norm_adonis_df$R2),], n=7)
## Permutation: free
## Number of permutations: 1000
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2   Pr(>F)    
## Total     116   20.7414                 1.00000             
## Residuals  84    2.7196 0.03238         0.13112             
## Temp.C      1    2.3025 2.30254  71.119 0.11101 0.000999 ***
## EC.uS.cm    1    2.1577 2.15772  66.646 0.10403 0.000999 ***
## Site.ID     1    1.8927 1.89267  58.459 0.09125 0.000999 ***
## K.µg.ml     1    1.3659 1.36591  42.189 0.06585 0.000999 ***
## Na.µg.ml    1    1.3646 1.36462  42.149 0.06579 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

It does! Only Temperature and EC significantly explain more than 10% of variation. Also, loads more variables seem to significantly correlate to microbial community structure!

Testing these two variables then, to understand if their adonis results are significant:

Homogeneity of dispersion test for significant categories:

Null hypothesis: tested metadata categories have the same dispersions (variance). betadisper calculates average distances of samples to group centroids (from metadata categories) permutest calculates ANOVAS on average distances from betadisper() to test if one or more groups is more variable than others

#Temperature
beta_t <- betadisper(ps_cap_r_jsd, sampledf_cap_norm_noNA_f_ord$Temp.C)
permutest(beta_t)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq  Mean Sq      F N.Perm Pr(>F)  
## Groups    38 1.1885 0.031276 1.8728    999  0.046 *
## Residuals 78 1.3026 0.016700                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#EC
beta_ec <- betadisper(ps_cap_r_jsd, sampledf_cap_norm_noNA_f_ord$EC.uS.cm)
permutest(beta_ec)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df   Sum Sq    Mean Sq      F N.Perm Pr(>F)
## Groups    105 0.309564 0.00294823 768.85    999  0.573
## Residuals  11 0.000042 0.00000383

This means group centroids for temperature and EC are not significantly shared, thus meaning that the adonis test was meaningful.

CAP with normalised metadata.

Using all significant variables for the formula, plotting only the ones explaining 5% or more variation.

#remove rows 73,74,75 (Taff's Well pumped samples) from norm_metadata and create new phyloseq object with normalised metadata
remove = c("73","74","75")
sampledf_cap_norm_noNA_f_ord_final <- sampledf_cap_norm_noNA_f_ord[!rownames(sampledf_cap_norm_noNA_f_ord) %in% remove, ]

sampledf_cap_norm_noNA_f_ord_final$Month_cat <- sampledf_cap$Month
sampledf_cap_norm_noNA_f_ord_final$Site.name <- sampledf_cap$Site.name

ps_cap_r_norm <- phyloseq(otu_table(ps_cap_r),
                       tax_table(ps_cap_r),
                       sample_data(sampledf_cap_norm_noNA_f_ord_final))

#CAP with significant normalised variables
CAP_ps_cap_r_norm <- ordinate(ps_cap_r_norm,
                           method = "CAP",
                           distance = "jsd",
                           k=3, trymax=1e3, weighted=TRUE,
                           formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)

CAP_norm_sig_plot_a12 <- plot_ordination(ps_cap_r_norm,
                                    ordination = CAP_ps_cap_r_norm, color = "Site.name", axes = c(1,2)) +
  aes(shape = Month_cat) +
  geom_point(aes(colour = Site.name), alpha = 0.7, size = 4) +
  geom_polygon(aes(fill=Site.name), alpha=0.5) +
  geom_point(aes(colour = Site.name), size = 1.5)+
  scale_color_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
  scale_fill_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
  theme(legend.position = "none")

arrowmat_CAP_norm <- vegan::scores(CAP_ps_cap_r_norm, display = "bp")
arrowmat_CAP_norm<-subset(arrowmat_CAP_norm,
                           rownames(arrowmat_CAP_norm) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm <- data.frame(labels = rownames(arrowmat_CAP_norm), arrowmat_CAP_norm)
rownames(arrowdf_CAP_norm) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm$labels <- c("Site ID","Temperature","EC","K","Na")

arrow_map_CAP_norm <- aes(xend = CAP1, yend = CAP2,
                     x = 0, y = 0,
                     shape = NULL, color = NULL,
                     label = labels)
label_map_CAP_norm <- aes(x = 1.1 * CAP1, y = 1.3 * CAP2,
                     shape = NULL, color = NULL,
                     label = labels)
arrowhead_CAP_norm = arrow(length = unit(0.02, "npc"))

CAP_norm_sig_plot_a12 = CAP_norm_sig_plot_a12 +
  geom_segment(mapping = arrow_map_CAP_norm, size = .5, 
               data = arrowdf_CAP_norm, color = "gray", arrow = arrowhead_CAP_norm) +
  geom_text(mapping = label_map_CAP_norm, size = 4, 
            data = arrowdf_CAP_norm, show.legend = FALSE)

arrowmat_CAP_norm_13 <- vegan::scores(CAP_ps_cap_r_norm, display = "bp", choices=c(1,3))
arrowmat_CAP_norm_13<-subset(arrowmat_CAP_norm_13,
                           rownames(arrowmat_CAP_norm_13) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_13 <- data.frame(labels = rownames(arrowmat_CAP_norm_13), arrowmat_CAP_norm_13)
rownames(arrowdf_CAP_norm_13) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_13$labels <- c("Site ID","Temperature","EC","K","Na")

arrow_map_CAP_norm_13 <- aes(xend = CAP1, yend = CAP3,
                        x = 0, y = 0,
                        shape = NULL, color = NULL,
                        label = labels)
label_map_CAP_norm_13 <- aes(x = 1.1 * CAP1, y = 1.3 * CAP3,
                        shape = NULL, color = NULL,
                        label = labels)
arrowhead_CAP_norm_13 = arrow(length = unit(0.02, "npc"))

CAP_norm_sig_plot_a13 <- plot_ordination(ps_cap_r_norm,
                                    ordination = CAP_ps_cap_r_norm, color = "Site.name",axes = c(1,3)) +
  aes(shape = Month_cat) +
  geom_point(aes(colour = Site.name), alpha = 0.7, size = 4) +
  geom_polygon(aes(fill=Site.name), alpha=0.5) +
  geom_point(aes(colour = Site.name), size = 1.5) +
  geom_segment(mapping = arrow_map_CAP_norm_13, size = .5,
               data = arrowdf_CAP_norm_13, color = "gray", arrow = arrowhead_CAP_norm_13) +
  geom_text(mapping = label_map_CAP_norm_13, size = 4,
            data = arrowdf_CAP_norm_13, show.legend = FALSE)+
  scale_color_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
  scale_fill_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13))

ps.b.r.CAP_norm_sig.plot<-plot_grid(CAP_norm_sig_plot_a12, CAP_norm_sig_plot_a13,
                               labels=c("A","B"))
ps.b.r.CAP_norm_sig.plot

So, as previously seen, two major groups composed of high-S and high-Fe samples. Temperature seems to play a role in defining high-S sample groups, but this changes throughout the year.

Is this ordination’s goodness of fit better than random models? i.e. are the constraints in this model significant?

anova(CAP_ps_cap_r_norm, perm=1e3)
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: capscale(formula = distance ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml, data = data, k = 3, trymax = 1000, weighted = TRUE)
##          Df SumOfSqs      F Pr(>F)    
## Model    23  17.5620 22.335  0.001 ***
## Residual 93   3.1794                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As before, but now what microbial groups are influenced by these variables? More ordination on the way.

ps_cap_r_norm_april = subset_samples(ps_cap_r_norm, Month_cat == "April")
ps_cap_r_norm_april_f = filter_taxa(ps_cap_r_norm_april, function(x) sum(x) > .01, TRUE)
ps_cap_r_norm_august = subset_samples(ps_cap_r_norm, Month_cat == "August")
ps_cap_r_norm_august_f = filter_taxa(ps_cap_r_norm_august, function(x) sum(x) > .01, TRUE)
ps_cap_r_norm_december = subset_samples(ps_cap_r_norm, Month_cat == "December")
ps_cap_r_norm_december_f = filter_taxa(ps_cap_r_norm_december, function(x) sum(x) > .01, TRUE)

CAP_norm_ps_cap_r_norm_april_f <- ordinate(ps_cap_r_norm_april_f,
                           method = "CAP",
                           distance = "jsd",
                           k=3, trymax=1e3, weighted=TRUE,
                           formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)

CAP_norm_ps_cap_r_norm_august_f <- ordinate(ps_cap_r_norm_august_f,
                           method = "CAP",
                           distance = "jsd",
                           k=3, trymax=1e3, weighted=TRUE,
                           formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)

CAP_norm_ps_cap_r_norm_december_f <- ordinate(ps_cap_r_norm_december_f,
                           method = "CAP",
                           distance = "jsd",
                           k=3, trymax=1e3, weighted=TRUE,
                           formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)

#april
arrowmat_CAP_norm_taxa_april <- vegan::scores(CAP_norm_ps_cap_r_norm_april_f, display = "bp")
arrowmat_CAP_norm_taxa_april<-subset(arrowmat_CAP_norm_taxa_april,
                           rownames(arrowmat_CAP_norm_taxa_april) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_april <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_april), arrowmat_CAP_norm_taxa_april)
rownames(arrowmat_CAP_norm_taxa_april) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_april) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_april$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_april <- aes(xend = CAP1, yend = CAP2,
                     x = 0, y = 0,
                     shape = NULL, color = NULL,
                     label = labels)
label_map_CAP_norm_taxa_april <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
                     shape = NULL, color = NULL,
                     label = labels)
arrowhead_CAP_norm_taxa_april = arrow(length = unit(0.02, "npc"))

april_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_april_f)[, "Phylum"], exclude = NULL)))

CAP_labels_april <- plot_ordination(ps_cap_r_norm_april_f,
                                    ordination = CAP_norm_ps_cap_r_norm_april_f, 
                                    axes = c(1,2), type="taxa")
CAP_april_xlabel <- CAP_labels_april$labels$x
CAP_april_ylabel <- CAP_labels_april$labels$y

CAP_norm_april_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_april_f,
                                    ordination = CAP_norm_ps_cap_r_norm_april_f, 
                                    axes = c(1,2), type="taxa", justDF = TRUE) 
CAP_norm_april_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_april_taxa_sig_plot_a12$Phylum)
CAP_norm_april_taxa_sig_plot_a12$Class=as.character(CAP_norm_april_taxa_sig_plot_a12$Class)
ov_CAP_norm_april_taxa_sig_plot_a12<-subset(CAP_norm_april_taxa_sig_plot_a12,
                           Phylum != "Proteobacteria")
ov_CAP_norm_april_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_april_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_april_taxa_sig_plot_a12<-subset(CAP_norm_april_taxa_sig_plot_a12,
                           Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_april_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_april_taxa_sig_plot_a12$Class)

april_class <- length(unique(class_ov_CAP_norm_april_taxa_sig_plot_a12$Class))

final_CAP_norm_april_taxa_sig_plot_a12 = ggplot(data=CAP_norm_april_taxa_sig_plot_a12,
                                                aes(x=CAP1,y=CAP2)) +
  geom_point(data=ov_CAP_norm_april_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum, size = 4)) +
  geom_point(data = class_ov_CAP_norm_april_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum,
                 shape = Class,
                 alpha = 1, size = 4)) +
  geom_segment(mapping = arrow_map_CAP_norm_taxa_april, size = .5, 
               data = arrowdf_CAP_norm_taxa_april, 
               color = "black", arrow = arrowhead_CAP_norm_taxa_april) +
  geom_text(mapping = label_map_CAP_norm_taxa_april, 
            data = subset(arrowdf_CAP_norm_taxa_april,
                          labels %in% c("Site ID", "Temperature","K","Na")),
            size = 5, fontface="bold",
            hjust = 0,
            show.legend = FALSE) +
  geom_text(mapping = label_map_CAP_norm_taxa_april, 
            data = subset(arrowdf_CAP_norm_taxa_april,
                          labels %in% c("EC")),
            size = 5, fontface="bold",
            show.legend = FALSE,
            nudge_y = 0.05) +
  scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_april_taxa_sig_plot_a12$Phylum))),
                     na.value = "black") +
  scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_april_taxa_sig_plot_a12$Phylum))),
                    na.value = "black")  +
  scale_shape_manual(values=c(15, 17, 18, 4)) +
  guides(fill = FALSE,
         size = FALSE,
         alpha = FALSE,
         color = guide_legend(nrow = april_phyla/2),
         shape = guide_legend(nrow = april_phyla/2)) +
  labs(subtitle = "Using all SVs above 1% relative abundance",
       title= "April",
       x = CAP_april_xlabel,
       y = CAP_april_ylabel) + 
  theme(plot.title = element_text(hjust = 0.5),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13, face = "bold"),
        legend.position = "right",
        legend.direction = "vertical",
        legend.box.margin = margin(100, 6, 6, 6))

#august
arrowmat_CAP_norm_taxa_august <- vegan::scores(CAP_norm_ps_cap_r_norm_august_f, display = "bp")
arrowmat_CAP_norm_taxa_august<-subset(arrowmat_CAP_norm_taxa_august,
                           rownames(arrowmat_CAP_norm_taxa_august) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_august <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_august), arrowmat_CAP_norm_taxa_august)
rownames(arrowmat_CAP_norm_taxa_august) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_august) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_august$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_august <- aes(xend = CAP1, yend = CAP2,
                     x = 0, y = 0,
                     shape = NULL, color = NULL,
                     label = labels)
label_map_CAP_norm_taxa_august <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
                     shape = NULL, color = NULL,
                     label = labels)
arrowhead_CAP_norm_taxa_august = arrow(length = unit(0.02, "npc"))

august_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_august_f)[, "Phylum"], exclude = NULL)))

CAP_labels_august <- plot_ordination(ps_cap_r_norm_august_f,
                                    ordination = CAP_norm_ps_cap_r_norm_august_f, 
                                    axes = c(1,2), type="taxa")
CAP_august_xlabel <- CAP_labels_august$labels$x
CAP_august_ylabel <- CAP_labels_august$labels$y

CAP_norm_august_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_august_f,
                                    ordination = CAP_norm_ps_cap_r_norm_august_f, 
                                    axes = c(1,2), type="taxa", justDF = TRUE) 
CAP_norm_august_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_august_taxa_sig_plot_a12$Phylum)
CAP_norm_august_taxa_sig_plot_a12$Class=as.character(CAP_norm_august_taxa_sig_plot_a12$Class)
ov_CAP_norm_august_taxa_sig_plot_a12<-subset(CAP_norm_august_taxa_sig_plot_a12,
                           Phylum != "Proteobacteria")
ov_CAP_norm_august_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_august_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_august_taxa_sig_plot_a12<-subset(CAP_norm_august_taxa_sig_plot_a12,
                           Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_august_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_august_taxa_sig_plot_a12$Class)

august_class <- length(unique(class_ov_CAP_norm_august_taxa_sig_plot_a12$Class))

final_CAP_norm_august_taxa_sig_plot_a12 = ggplot(data=CAP_norm_august_taxa_sig_plot_a12,
                                                aes(x=CAP1,y=CAP2)) +
  geom_point(data=ov_CAP_norm_august_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum, size = 4)) +
  geom_point(data = class_ov_CAP_norm_august_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum,
                 shape = Class,
                 alpha = 1, size = 4)) +
  geom_segment(mapping = arrow_map_CAP_norm_taxa_august, size = .5, 
               data = arrowdf_CAP_norm_taxa_august, 
               color = "black", arrow = arrowhead_CAP_norm_taxa_august) +
  geom_text(mapping = label_map_CAP_norm_taxa_august, 
            data = subset(arrowdf_CAP_norm_taxa_august,
                          labels %in% c("Site ID", "Temperature","EC","Na")),
            size = 5, fontface="bold",
            hjust = 0,
            show.legend = FALSE) +
  geom_text(mapping = label_map_CAP_norm_taxa_august, 
            data = subset(arrowdf_CAP_norm_taxa_august,
                          labels %in% c("K")),
            size = 5, fontface="bold",
            show.legend = FALSE,
            nudge_y = 0.05) +
  scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_august_taxa_sig_plot_a12$Phylum))),
                     na.value = "black") +
  scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_august_taxa_sig_plot_a12$Phylum))),
                    na.value = "black")  +
  scale_shape_manual(values=c(15, 17, 18, 4)) +
  guides(fill = FALSE,
         color = guide_legend(nrow= august_phyla/2),
         size = FALSE,
         alpha = FALSE,
         shape = guide_legend(nrow = august_phyla/2)) +
  labs(title= "August",
       x = CAP_august_xlabel,
       y = CAP_august_ylabel) + 
  theme(plot.title = element_text(hjust = 0.5),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13, face = "bold"),
        legend.position = "right",
        legend.direction = "vertical",
        legend.box.margin = margin(100, 6, 6, 6))

#december
arrowmat_CAP_norm_taxa_december <- vegan::scores(CAP_norm_ps_cap_r_norm_december_f, display = "bp")
arrowmat_CAP_norm_taxa_december<-subset(arrowmat_CAP_norm_taxa_december,
                           rownames(arrowmat_CAP_norm_taxa_december) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_december <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_december), arrowmat_CAP_norm_taxa_december)
rownames(arrowmat_CAP_norm_taxa_december) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_december) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_december$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_december <- aes(xend = CAP1, yend = CAP2,
                     x = 0, y = 0,
                     shape = NULL, color = NULL,
                     label = labels)
label_map_CAP_norm_taxa_december <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
                     shape = NULL, color = NULL,
                     label = labels)
arrowhead_CAP_norm_taxa_december = arrow(length = unit(0.02, "npc"))

december_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_december_f)[, "Phylum"], exclude = NULL)))

CAP_labels_december <- plot_ordination(ps_cap_r_norm_december_f,
                                    ordination = CAP_norm_ps_cap_r_norm_december_f, 
                                    axes = c(1,2), type="taxa")
CAP_december_xlabel <- CAP_labels_december$labels$x
CAP_december_ylabel <- CAP_labels_december$labels$y

CAP_norm_december_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_december_f,
                                    ordination = CAP_norm_ps_cap_r_norm_december_f, 
                                    axes = c(1,2), type="taxa", justDF = TRUE) 
CAP_norm_december_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_december_taxa_sig_plot_a12$Phylum)
CAP_norm_december_taxa_sig_plot_a12$Class=as.character(CAP_norm_december_taxa_sig_plot_a12$Class)
ov_CAP_norm_december_taxa_sig_plot_a12<-subset(CAP_norm_december_taxa_sig_plot_a12,
                           Phylum != "Proteobacteria")
ov_CAP_norm_december_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_december_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_december_taxa_sig_plot_a12<-subset(CAP_norm_december_taxa_sig_plot_a12,
                           Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_december_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_december_taxa_sig_plot_a12$Class)

december_class <- length(unique(class_ov_CAP_norm_december_taxa_sig_plot_a12$Class))

final_CAP_norm_december_taxa_sig_plot_a12 = ggplot(data=CAP_norm_december_taxa_sig_plot_a12,
                                                aes(x=CAP1,y=CAP2)) +
  geom_point(data=ov_CAP_norm_december_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum, size = 4)) +
  geom_point(data = class_ov_CAP_norm_december_taxa_sig_plot_a12,
             aes(x=CAP1, y=CAP2,
                 color = Phylum,
                 shape = Class,
                 alpha = 1, size = 4)) +
  geom_segment(mapping = arrow_map_CAP_norm_taxa_december, size = .5, 
               data = arrowdf_CAP_norm_taxa_december, 
               color = "black", arrow = arrowhead_CAP_norm_taxa_december) +
  geom_text(mapping = label_map_CAP_norm_taxa_december,
            data = arrowdf_CAP_norm_taxa_december,
            size = 5, fontface="bold",
            show.legend = FALSE) +
  scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_december_taxa_sig_plot_a12$Phylum))),
                     na.value = "black") +
  scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_december_taxa_sig_plot_a12$Phylum))),
                    na.value = "black")  +
  scale_shape_manual(values=c(15, 17, 18, 4)) +
  guides(fill = FALSE,
         color = guide_legend(nrow = december_phyla/2),
         size = FALSE,
         alpha = FALSE,
         shape = guide_legend(nrow = december_phyla/2)) +
  labs(title= "December",
       x = CAP_december_xlabel,
       y = CAP_december_ylabel) + 
  theme(plot.title = element_text(hjust = 0.5),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13, face = "bold"),
        legend.position = "right",
        legend.direction = "vertical",
        legend.box.margin = margin(100, 6, 6, 6))

ps_cap_r_norm_Month_cat<-plot_grid(final_CAP_norm_april_taxa_sig_plot_a12, 
                                   final_CAP_norm_august_taxa_sig_plot_a12, 
                                   final_CAP_norm_december_taxa_sig_plot_a12,
                                   align = 'v', ncol = 1)
ps_cap_r_norm_Month_cat

It seems K, EC, Na and temperature influenced a subset of the microbial community mostly composed of Gamma- and Epsilonproteobacteria as well as Omnitrophica plus others across the year. Omintrophica and Parcubacteria seem to appear in different numbers across the year within this cluster.

So who defines this cluster? Let’s select the corresponding points of the CAP and figure out.

CAP_norm_cluster_april = subset(final_CAP_norm_april_taxa_sig_plot_a12$data,
                          CAP1 >= 0.75)
CAP_norm_cluster_august = subset(final_CAP_norm_august_taxa_sig_plot_a12$data,
                          CAP1 >= 0.75)
CAP_norm_cluster_december = subset(final_CAP_norm_december_taxa_sig_plot_a12$data,
                          CAP1 >= 0.75)

CAP_norm_cluster_april_names <- rownames(CAP_norm_cluster_april)
CAP_norm_cluster_august_names <- rownames(CAP_norm_cluster_august)
CAP_norm_cluster_december_names <- rownames(CAP_norm_cluster_december)

ps.b.r_april = subset_samples(ps.b.r, Month == "April")
ps.b.r_april_f = filter_taxa(ps.b.r_april, function(x) sum(x) > .01, TRUE)
ps.b.r_august = subset_samples(ps.b.r, Month == "August")
ps.b.r_august_f = filter_taxa(ps.b.r_august, function(x) sum(x) > .01, TRUE)
ps.b.r_december = subset_samples(ps.b.r, Month == "December")
ps.b.r_december_f = filter_taxa(ps.b.r_december, function(x) sum(x) > .01, TRUE)

ps.b.r_april_CAPcluster_taxa   <- prune_taxa(CAP_norm_cluster_april_names, ps.b.r_april_f)
ps.b.r_august_CAPcluster_taxa   <- prune_taxa(CAP_norm_cluster_august_names, ps.b.r_august_f)
ps.b.r_december_CAPcluster_taxa   <- prune_taxa(CAP_norm_cluster_december_names, ps.b.r_december_f)

What are the Proteobacteria-to-other-phyla proportions here?

sv.phyl.p.april<-as.data.frame(table(tax_table(ps.b.r_april_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.april.ord <- sv.phyl.p.april[order(-sv.phyl.p.april$Freq),] 
head(sv.phyl.p.april.ord, n=10)
##                         Var1 Freq
## 11            Proteobacteria   39
## 9               Omnitrophica    5
## 5              Cyanobacteria    2
## 7              Euryarchaeota    2
## 1               Acetothermia    1
## 2  Candidatus_Berkelbacteria    1
## 3                   Chlorobi    1
## 4                Chloroflexi    1
## 6              Elusimicrobia    1
## 8                Nitrospirae    1
sv.phyl.p.august<-as.data.frame(table(tax_table(ps.b.r_august_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.august.ord <- sv.phyl.p.august[order(-sv.phyl.p.august$Freq),] 
head(sv.phyl.p.august.ord, n=10)
##                Var1 Freq
## 10   Proteobacteria   38
## 9     Parcubacteria    4
## 2     Cyanobacteria    2
## 8      Omnitrophica    2
## 1     Bacteroidetes    1
## 3           FCPU426    1
## 4        Firmicutes    1
## 5  Gemmatimonadetes    1
## 6    Microgenomates    1
## 7       Nitrospirae    1
sv.phyl.p.december<-as.data.frame(table(tax_table(ps.b.r_december_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.december.ord <- sv.phyl.p.december[order(-sv.phyl.p.december$Freq),] 
head(sv.phyl.p.december.ord, n=10)
##               Var1 Freq
## 9   Proteobacteria   32
## 8    Parcubacteria    4
## 7     Omnitrophica    3
## 6      Nitrospirae    2
## 11            WWE3    2
## 1      Chloroflexi    1
## 2    Cloacimonetes    1
## 3    Euryarchaeota    1
## 4       Firmicutes    1
## 5  Latescibacteria    1
ps.b.r_april_CAPcluster_taxa.glom <- tax_glom(ps.b.r_april_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_april_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_april_CAPcluster_taxa.glom))
ps.b.r_april_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_april_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_april_CAPcluster_taxa.plot<-ggplot(ps.b.r_april_CAPcluster_taxa.glom.psdf[Abundance > 0], 
                                    aes(x = Site.name, y = Abundance/3, fill = Genus)) + 
  geom_bar(stat = "identity") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) + 
  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
  ylab("Relative Abundance\n") +
  scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus))),
                     na.value = "black")

ps.b.r_august_CAPcluster_taxa.glom <- tax_glom(ps.b.r_august_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_august_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_august_CAPcluster_taxa.glom))
ps.b.r_august_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_august_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_august_CAPcluster_taxa.plot<-ggplot(ps.b.r_august_CAPcluster_taxa.glom.psdf[Abundance > 0], 
                                    aes(x = Site.name, y = Abundance/3, fill = Genus)) + 
  geom_bar(stat = "identity") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) + 
  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
  ylab("Relative Abundance\n") +
  scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus))),
                     na.value = "black")

ps.b.r_december_CAPcluster_taxa.glom <- tax_glom(ps.b.r_december_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_december_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_december_CAPcluster_taxa.glom))
ps.b.r_december_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_december_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_december_CAPcluster_taxa.plot<-ggplot(ps.b.r_december_CAPcluster_taxa.glom.psdf[Abundance > 0], 
                                    aes(x = Site.name, y = Abundance/3, fill = Genus)) + 
  geom_bar(stat = "identity") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) + 
  guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
  ylab("Relative Abundance\n") +
  scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus))),
                     na.value = "black")

ps.b.r_CAPcluster_taxa_plot <- plot_grid(ps.b.r_april_CAPcluster_taxa.plot, 
                                         ps.b.r_august_CAPcluster_taxa.plot, 
                                         ps.b.r_december_CAPcluster_taxa.plot,
                                         align = 'v', ncol = 1)
ps.b.r_CAPcluster_taxa_plot

As expected, the genus-level analysis shows that the main Epsilonproteobacterial players are the ones modulating this shift showed by the CAP. Further, the Gammaproteobacteria noticed in that shifted cluster are represented by Thiotrix. In April, Omnitrophica follows Proteobacteria in SV presence, whilst in August and December it’s Parcubacteria, both of these in numbers ~8x smaller than Proteobacterial SVs.

Importantly, this cluster is, albeit not in huge amounts, driven by temperature, K, EC and Na, bit less in December. What can this mean mostly for these environmental variables? I can see how temperature might impact given that in Crumlin, Celynen and Six Bells are amongst the warmest sites. The first two also present the highest EC values in the same figure.

Ok then, I have no idea what to do at this point, passing on to other stuff.